library(caret)
library(tidyverse)
library(yardstick)

#set working directory and randomization seed
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
set.seed(65)

#Note that this file should be run after the script random_forest_splitdata_fl.R, which will clean and divide the data
#The data subset in the file FL_validate_20.csv is created in the random_forest_splitdata_fl.R file.

#load in the model object

load("random_forest_model_object.Rdata")

#load in the test data

fl_valid <- read_csv("data/FL_validate_20.csv")


#predict values

fl_valid$rf.race <- predict(fit_classweights, newdata = fl_valid)

write_csv(fl_valid, file = "data/fl_valid_predicted.csv")

#calculate some f-1 scores
f_meas(fl_valid, truth = sr.race, estimate = rf.race)

fl_valid <- mutate(fl_valid, bisg.race = if_else(pred.whi> pred.bla & pred.whi > pred.lat & pred.whi > pred.asi & pred.whi > pred.oth, "White", 
                                                 if_else(pred.bla > pred.whi & pred.bla > pred.lat & pred.bla > pred.asi & pred.bla > pred.oth, "Black", 
                                                         if_else(pred.lat > pred.bla & pred.lat > pred.whi & pred.lat > pred.asi & pred.lat > pred.oth, "Hispanic",
                                                                 if_else(pred.asi > pred.bla & pred.asi > pred.whi & pred.asi > pred.oth & pred.asi > pred.lat, "Asian",
                                                                         if_else(pred.oth > pred.lat & pred.oth > pred.whi & pred.oth > pred.bla & pred.oth > pred.asi, "Other", NA_character_))))))
f_meas(fl_valid[fl_valid$sr.race!="Unknown",], truth = as.factor(sr.race), estimate = as.factor(bisg.race))
